#   LibAut
#       joint onset & outcome regressions, with missing data imputation
#       Table C2
#   Juraj Medzihorsky
#   2021-08-14

library(Amelia)
library(GJRM)
library(parallel)
options(mc.cores=2)
options(stringsAsFactors=F)
source('helpers.R')

#   sessionInfo()
##  R version 3.6.3 (2020-02-29)
##  Platform: x86_64-pc-linux-gnu (64-bit)
##  Running under: Ubuntu 16.04.7 LTS
##
##  Matrix products: default
##  BLAS/LAPACK: /opt/OpenBLAS/lib/libopenblas_haswellp-r0.3.9.so
##
##  locale:
##   [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8
##   [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_GB.UTF-8       LC_NAME=C
##   [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
##  attached base packages:
##  [1] parallel  stats     graphics  grDevices utils     datasets  methods   base
##
##  other attached packages:
##  [1] GJRM_0.2       mgcv_1.8-28    nlme_3.1-152   Amelia_1.7.5   Rcpp_1.0.7     nvimcom_0.9-28 colorout_1.2-0
##
##  loaded via a namespace (and not attached):
##   [1] ADGofTest_0.3       mvtnorm_1.0-8       lattice_0.20-44     gmp_0.5-13.2        assertthat_0.2.1
##   [6] psych_1.8.10        foreach_1.4.4       utf8_1.2.1          R6_2.5.0            distr_2.7.0
##  [11] magic_1.5-9         stats4_3.6.3        pcaPP_1.9-73        survey_3.34         ggplot2_3.3.3
##  [16] pillar_1.6.1        rlang_0.4.11        scam_1.2-6          Matrix_1.3-4        splines_3.6.3
##  [21] foreign_0.8-76      munsell_0.5.0       compiler_3.6.3      numDeriv_2016.8-1.1 pkgconfig_2.0.3
##  [26] gsl_1.9-10.3        mnormt_1.5-5        gamlss.dist_5.1-1   evd_2.3-3           tidyselect_1.1.1
##  [31] tibble_3.1.2        VineCopula_2.1.8    matrixcalc_1.0-3    codetools_0.2-18    matrixStats_0.54.0
##  [36] stabledist_0.7-1    fansi_0.5.0         trustOptim_0.8.6.2  pspline_1.0-18      crayon_1.4.1
##  [41] dplyr_1.0.6         MASS_7.3-54         grid_3.6.3          gtable_0.3.0        lifecycle_1.0.0
##  [46] DBI_1.0.0           magrittr_2.0.1      scales_1.1.1        Rmpfr_0.7-2         doParallel_1.0.14
##  [51] distrEx_2.7.0       ellipsis_0.3.2      vctrs_0.3.8         generics_0.1.0      trust_0.1-7
##  [56] iterators_1.0.10    tools_3.6.3         copula_0.999-19     glue_1.4.2          purrr_0.3.4
##  [61] sfsmisc_1.1-3       network_1.13.0.1    abind_1.4-5         survival_2.44-1.1   colorspace_1.3-2
##  [66] VGAM_1.0-6          startupmsg_0.9.5

#------------------------------------------------------------------------------

code_dir <- getwd()
data_dir <- gsub('code$', 'data', code_dir)
figs_dir <- gsub('code$', 'figures', code_dir)
tabs_dir <- gsub('code$', 'tables', code_dir)

setwd(data_dir)
dat <- readRDS('onset_20210810.rds')
eps <- read.csv('eps.csv')
setwd(code_dir)

dat <- subset(dat, onset_eligible%in%1)

#------------------------------------------------------------------------------
#   get EP shares
tab_dem <- aggregate(eps$dem_ep, by=list(eps$year), FUN=mean, na.rm=T)
tab_aut <- aggregate(eps$aut_ep, by=list(eps$year), FUN=mean, na.rm=T)
names(tab_dem) <- c('year', 'share_dem_ep')
names(tab_aut) <- c('year', 'share_aut_ep')

tab <- merge(tab_dem, tab_aut, by='year')

tab_lag <- tab
tab_lag$year <- tab$year + 1
names(tab_lag)[2:3] <- paste0('lag1_', names(tab)[2:3])

tab_tab <- merge(tab, tab_lag)

dat <- merge(dat, tab_tab, by='year')

#------------------------------------------------------------------------------
#   get outcome

#   recode outcomes
#   epy: suc 1 fail 0
dat$epy <- as.numeric(rep(NA,nrow(dat)))
#   success: 1, 5
dat$epy[dat$dem_ep_outcome%in%c(1,5)] <- 1
#   fail: 2,3,4
dat$epy[dat$dem_ep_outcome%in%c(2,3,4)] <- 0
#   censored: 6
dat$epy[dat$dem_ep_outcome%in%6] <- NA

#   with(dat, table(epy, dem_ep_outcome, useNA='a'))
#   sum(!is.na(dat$epy))

#------------------------------------------------------------------------------

dat$logpop <- log(dat$e_mipopula) 

dat$reg_fac <- as.factor(dat$e_regionpol_6C)

#   with(dat, table(reg_fac, e_regionpol_6C))

reglabs <- c('Eastern Europe & Central Asia',
             'Latin America &the Caribbean',
             'MENA, ISR, TUR',
             'Sub-Saharan Africa',
             'Western Europe, North America, CYP, AUS, NZ',
             'Asia & Pacific')

#------------------------------------------------------------------------------

dat_old <- dat

#   filter out missing
vars_to_go <- 
    c('epy',
      'libaut_start', 
      'year',
      'reg_fac', 
      'lag1_e_migdppcln',  
      'lag1_e_migdpgro',  
      'lag1_logpop', 
      'lag1_v2pepwrsoc',  
      'lag1_v2xeg_eqdr',  
      'lag1_v2xnp_pres',  
      'lag1_v2csprtcpt', 
      'lag1_v2x_polyarchy', 
      'lag1_excl_region_edi',  
      'lag1_e_miinteco',  
      'lag1_e_miinterc', 
      'lag1_share_dem_ep', 
      'lag1_share_aut_ep')  



dat <- dat[, c(vars_to_go, 'country_text_id', 'dem_ep_id')]

#------------------------------------------------------------------------------

bins <- c('lag1_e_miinteco', 'lag1_e_miinterc')#, 'libaut_start') 
cats <- c('country_text_id', 'dem_ep_id', 'reg_fac')


set.seed(123)
system.time(
    a <- amelia(dat, m=100, polytime=3, 
                idvars=c('dem_ep_id', 'libaut_start', 'epy'), 
                ts='year', cs='country_text_id', noms=c(bins, 'reg_fac'))
)


#------------------------------------------------------------------------------
#   fit

#   formulas
y_0 <- epy ~ 1
s_0 <- libaut_start ~ 1
#
lin_1 <- ~ . + 
    reg_fac +
    lag1_e_migdppcln + 
    lag1_e_migdpgro + 
    lag1_logpop +
    lag1_v2pepwrsoc + 
    lag1_v2xeg_eqdr + 
    lag1_v2xnp_pres + 
    lag1_v2csprtcpt +
    lag1_v2x_polyarchy +
    lag1_excl_region_edi + 
    lag1_e_miinteco + 
    lag1_e_miinterc 
#
lin_year <- ~ . + I((1e-1*(year-1990))) + I((1e-1*(year-1990))^2) + I((1e-1*(year-1990))^3) 
gam_year <- ~ . + s(year, bs="gp")
#
lin_sel <- ~ . + lag1_share_dem_ep + lag1_share_aut_ep
gam_sel <- ~ . + s(lag1_share_dem_ep, bs="gp") + s(lag1_share_aut_ep, bs="gp")
#
y_l_1 <- update(y_0, lin_1)
y_l_2 <- update(y_l_1, lin_year)

s_l_1 <- update(update(s_0, lin_1), lin_sel)
s_l_2 <- update(update(s_l_1, lin_year), lin_sel)

mod1 <- 
    function(my_dat)
    {
        m <- gjrm(list(s_l_1, y_l_1),
                  data=my_dat, Model='BSS',
                  gamlssfit=T, extra.regI='t', 
                  margin=c('logit', 'logit'), BivD='N')
        return(m)
    }

mod2 <- 
    function(my_dat)
    {
        m <- gjrm(list(s_l_2, y_l_2),
                  data=my_dat, Model='BSS',
                  gamlssfit=T, extra.regI='t', 
                  margin=c('logit', 'logit'), BivD='N')
        return(m)
    }


system.time(run1 <- lapply(a$imputations, mod1))
system.time(run2 <- lapply(a$imputations, mod2))


#   sort(names(run1[[1]]))
#   sort(names(summary(run1[[1]])))
#   names(summary(run1[[1]]$gam1))
#   summary(run1[[1]])$theta
#   summary(run1[[1]])$CItheta

bsel1 <- do.call(rbind, lapply(run1, function(x) summary(x$gam1)$p.coeff))
bsel2 <- do.call(rbind, lapply(run2, function(x) summary(x$gam1)$p.coeff))
bout1 <- do.call(rbind, lapply(run1, function(x) summary(x$gam2)$p.coeff))
bout2 <- do.call(rbind, lapply(run2, function(x) summary(x$gam2)$p.coeff))

esel1 <- do.call(rbind, lapply(run1, function(x) summary(x$gam1)$se))
esel2 <- do.call(rbind, lapply(run2, function(x) summary(x$gam1)$se))
eout1 <- do.call(rbind, lapply(run1, function(x) summary(x$gam2)$se))
eout2 <- do.call(rbind, lapply(run2, function(x) summary(x$gam2)$se))


#   mi.meld(bsel1, esel1)$se.mi

bb <- matrix(ncol=4, nrow=ncol(bsel2))
bb[,] <- NA
rownames(bb) <- colnames(bsel2)
bb[colnames(bsel1),1] <- mi.meld(bsel1, esel1)$q.mi
bb[colnames(bout1),2] <- mi.meld(bout1, eout1)$q.mi
bb[colnames(bsel2),3] <- mi.meld(bsel2, esel2)$q.mi
bb[colnames(bout2),4] <- mi.meld(bout2, eout2)$q.mi


bt1 <- paste(apply(round(bb[,1:2],2), 1, paste, collapse=' & '), '&&')
bt2 <- paste(apply(round(bb[,3:4],2), 1, paste, collapse=' & '), '\\\\')
bt <- apply(cbind(bt1,bt2), 1, paste, collapse='')

getci <-
    function(x)
    {
        ci <- round(data.frame(lo=x[,1]-1.96*x[,2], hi=x[,1]+1.96*x[,2]),2)
        y <- apply(ci, 1, paste0, collapse=', ')
        r <- paste0('(',y,')')
        names(r) <- rownames(ci)
        return(r)
    }

#   getci(ms1)

cmondude <-
    function(x)
    {
        z <- do.call(cbind, lapply(x, as.vector))
        rownames(z) <- colnames(x[[1]])
        return(z)
    }

cc <- matrix(ncol=4, nrow=ncol(bsel2))
cc[,] <- NA
rownames(cc) <- colnames(bsel2)
cc[colnames(bsel1),1] <- getci(cmondude(mi.meld(bsel1, esel1)))
cc[colnames(bout1),2] <- getci(cmondude(mi.meld(bout1, eout1)))
cc[colnames(bsel2),3] <- getci(cmondude(mi.meld(bsel2, esel2)))
cc[colnames(bout2),4] <- getci(cmondude(mi.meld(bout2, eout2)))

ct1 <- paste(apply(cc[,1:2], 1, paste, collapse=' & '), '&&')
ct2 <- paste(apply(cc[,3:4], 1, paste, collapse=' & '), '\\\\')
ct <- apply(cbind(ct1,ct2), 1, paste, collapse='')


thetas <- paste0('$\\theta$ & \\multicolumn{2}{c}{', 
                 round(mean(sapply(run1, function(x) summary(x)$theta)), 2), 
                 '} && \\multicolumn{2}{c}{',
                 round(mean(sapply(run2, function(x) summary(x)$theta)), 2), 
                 '}\\\\') 

citheta <- paste0(' & \\multicolumn{2}{c}{(', 
                  paste0(round(mean(sapply(run1, function(x) summary(x)$CItheta[1])), 2), ', '),
                  paste0(round(mean(sapply(run1, function(x) summary(x)$CItheta[2])), 2), ')'),
                  '} && \\multicolumn{2}{c}{(',
                  paste0(round(mean(sapply(run2, function(x) summary(x)$CItheta[1])), 2), ', '),
                  paste0(round(mean(sapply(run2, function(x) summary(x)$CItheta[2])), 2), ')'),
                  '}\\\\') 

yearline <- 'Year cubic polynomial & \\multicolumn{2}{c}{No} && \\multicolumn{2}{c}{Yes}\\\\'




tab <- character(length=4*length(bt))
for (i in 1:length(bt)) {
    ii <- (i-1)*3 + i
    tab[ii] <- rownames(bb)[i]
    tab[ii+1] <- paste0(' & ', bt[i])
    tab[ii+2] <- paste0(' & ', ct[i])
}
tab <- c(tab, yearline, thetas, citheta)
tab <- gsub('-', '$-$', tab)
tab <- gsub('NA', '', tab)

#------------------------------------------------------------------------------
#   save

setwd(tabs_dir)
writeLines(tab, con='Table_C2.tex')

#   SCRIPT END